
*********************************************************
* Descriptive Statistics Tables 1, 2
*********************************************************

clear all
set more off
use "${file}sine_panel.dta", clear

// define sample
reghdfe FINV4 FEMALE NAF4_crea_d5 nf4crea AGED NATIOA1 EDUC2 EDUC3 GRANDEECO EXPERT SERIAL DIRIGA INC OBJECTIF INOV_SUM B2B GEOCLIA MOYENS_*  ///
MOTIV1 MOTIV2 MOTIV4 MOTIV3 MOTIV5 MOTIV6 ///
, cluster(ape4) absorb(apey4 dep)
gen sample=e(sample)


*********************************************************
* TABLE 1 - % WOMEN BY SECTORS
*********************************************************
* % women
tab FEMALE YEAR  if sample, col
tabout FEMALE YEAR using "${output}stat_female.tex" if sample, ///
replace cells(freq col) layout(row) format(0c 2c) style(tex)

* % women and female-dominated sectors
tab FEMALE NAF4_crea_d5 if sample,cell
gen c="F in F-dominated sector" if FEMALE==1&NAF4_crea_d5==1
replace c="F in M-dominated sector" if FEMALE==1&NAF4_crea_d5==0
replace c="M in F-dominated sector" if FEMALE==0&NAF4_crea_d5==1
replace c="M in M-dominated sector" if FEMALE==0&NAF4_crea_d5==0
gen d="F-dominated sector" if NAF4_crea_d5==1
replace d="M-dominated sector" if NAF4_crea_d5==0

tabout c YEAR using "${output}stat_femalesec.tex" if FEMALE==1&NAF4_crea_d5==1&sample, ///
replace cells(freq col) layout(row) f(0c 2c) style(tex) 
tabout c YEAR using "${output}stat_femalesec.tex" if FEMALE==1&NAF4_crea_d5==0&sample, ///
append cells(freq col) layout(row) f(0c 2c) style(tex)
tabout c YEAR using "${output}stat_femalesec.tex" if FEMALE==0&NAF4_crea_d5==1&sample, ///
append cells(freq col) layout(row) f(0c 2c) style(tex)
tabout c YEAR using "${output}stat_femalesec.tex" if FEMALE==0&NAF4_crea_d5==0&sample, ///
append cells(freq col) layout(row) f(0c 2c) style(tex)
tab c YEAR if FEMALE==1&sample, col
tab c YEAR if FEMALE==0&sample, col
tab c YEAR, col
tabout c YEAR using "${output}stat_femalesec.tex" if sample, ///
replace cells(freq col) layout(row) f(0c 2c) style(tex) 
tabout d YEAR using "${output}stat_femalesec.tex" if sample, ///
append cells(freq col) layout(row) f(0c 2c) style(tex)


**************************************************
* TABLE 2 - ENTREPRENEUR & STARTUP CHARACTERISTICS
**************************************************
eststo clear
set more off

* all firms
eststo all: estpost summ ///
AGED NATIOA1 ///
EDUC0 EDUC1 EDUC2 EDUC3 GRANDEECO EXPERT SERIAL SUPEREXPERT SUPERSERIAL ///
STATUS2 STATUS1 STATUS3 STATUS5 STATUS4  ///
ENF COUPL ///
DIRIGA DIRIG2A DIRIG3A DIRIG4A ///
INC OBJECTIF ///
MOTIV1 MOTIV2 MOTIV4 MOTIV3 MOTIV5 MOTIV6 MOTIV7 MOTIV0 ///
INOV_SUM INOVP INOVF INOVM INOVOR APE2_INOV ///
B2B GEOCLIA GEOCLIA2 GEOCLIA3 GEOCLIA1 NBCLIA1 NBCLIA2 NBCLIA3 NBCLIA4 ///
FINVEXT VC FINV4 FINV1 FINV2 FINV6 FINV3 FINV5  ///
CAPITAL MOYENS_2K MOYENS_4K MOYENS_8K MOYENS_16K MOYENS_40K MOYENS_80K MOYENS_160K ///
SURV3 SURV5 ///
SALA0 SALA1 SALA2 SALA3 SALA3 SALA45 SALA610 SALA11 ///
DIFF0 DIFF7 DIFF8 DIFF9 DIFF2 DIFF3 DIFF4 DIFF5 DIFF6 DIFF1 ///
if sample

* male-founded
eststo male: quietly estpost summ ///
AGED NATIOA1 ///
EDUC0 EDUC1 EDUC2 EDUC3 GRANDEECO EXPERT SERIAL SUPEREXPERT SUPERSERIAL ///
STATUS2 STATUS1 STATUS3 STATUS5 STATUS4  ///
ENF COUPL ///
DIRIGA DIRIG2A DIRIG3A DIRIG4A ///
INC OBJECTIF ///
MOTIV1 MOTIV2 MOTIV4 MOTIV3 MOTIV5 MOTIV6 MOTIV7 MOTIV0 ///
INOV_SUM INOVP INOVF INOVM INOVOR APE2_INOV ///
B2B GEOCLIA GEOCLIA2 GEOCLIA3 GEOCLIA1 NBCLIA1 NBCLIA2 NBCLIA3 NBCLIA4 ///
FINVEXT VC FINV4 FINV1 FINV2 FINV6 FINV3 FINV5  ///
CAPITAL MOYENS_2K MOYENS_4K MOYENS_8K MOYENS_16K MOYENS_40K MOYENS_80K MOYENS_160K ///
SURV3 SURV5 ///
SALA0 SALA1 SALA2 SALA3 SALA3 SALA45 SALA610 SALA11 ///
DIFF0 DIFF7 DIFF8 DIFF9 DIFF2 DIFF3 DIFF4 DIFF5 DIFF6 DIFF1 ///
if FEMALE==0 &sample

* female-founded
eststo fem: quietly estpost summ ///
AGED NATIOA1 ///
EDUC0 EDUC1 EDUC2 EDUC3 GRANDEECO EXPERT SERIAL SUPEREXPERT SUPERSERIAL ///
STATUS2 STATUS1 STATUS3 STATUS5 STATUS4  ///
ENF COUPL ///
DIRIGA DIRIG2A DIRIG3A DIRIG4A ///
INC OBJECTIF ///
MOTIV1 MOTIV2 MOTIV4 MOTIV3 MOTIV5 MOTIV6 MOTIV7 MOTIV0 ///
INOV_SUM INOVP INOVF INOVM INOVOR APE2_INOV ///
B2B GEOCLIA GEOCLIA2 GEOCLIA3 GEOCLIA1 NBCLIA1 NBCLIA2 NBCLIA3 NBCLIA4 ///
FINVEXT VC FINV4 FINV1 FINV2 FINV6 FINV3 FINV5  ///
CAPITAL MOYENS_2K MOYENS_4K MOYENS_8K MOYENS_16K MOYENS_40K MOYENS_80K MOYENS_160K ///
SURV3 SURV5 ///
SALA0 SALA1 SALA2 SALA3 SALA3 SALA45 SALA610 SALA11 ///
DIFF0 DIFF7 DIFF8 DIFF9 DIFF2 DIFF3 DIFF4 DIFF5 DIFF6 DIFF1 ///
if FEMALE==1 &sample

* t-stat
eststo diff: quietly estpost ttest ///
AGED NATIOA1 ///
EDUC0 EDUC1 EDUC2 EDUC3 GRANDEECO EXPERT SERIAL SUPEREXPERT SUPERSERIAL ///
STATUS2 STATUS1 STATUS3 STATUS5 STATUS4  ///
ENF COUPL ///
DIRIGA DIRIG2A DIRIG3A DIRIG4A ///
INC OBJECTIF ///
MOTIV1 MOTIV2 MOTIV4 MOTIV3 MOTIV5 MOTIV6 MOTIV7 MOTIV0 ///
INOV_SUM INOVP INOVF INOVM INOVOR APE2_INOV ///
B2B GEOCLIA GEOCLIA2 GEOCLIA3 GEOCLIA1 NBCLIA1 NBCLIA2 NBCLIA3 NBCLIA4 ///
FINVEXT VC FINV4 FINV1 FINV2 FINV6 FINV3 FINV5  ///
CAPITAL MOYENS_2K MOYENS_4K MOYENS_8K MOYENS_16K MOYENS_40K MOYENS_80K MOYENS_160K ///
SURV3 SURV5 ///
SALA0 SALA1 SALA2 SALA3 SALA3 SALA45 SALA610 SALA11 ///
DIFF0 DIFF7 DIFF8 DIFF9 DIFF2 DIFF3 DIFF4 DIFF5 DIFF6 DIFF1 ///
if sample, by(FEMALE) unequal 

* Export table
set more off
esttab all male fem diff , ///
cells("count(pattern(0 1 1 0) fmt(0)) mean(pattern(1 1 1 0) fmt(3)) b(star pattern(0 0 0 1) fmt(2)) t(pattern(0 0 0 1) par fmt(2))") ///
label star(* 0.1 ** 0.05 *** 0.01)  collabels(none) booktabs


*********************************************************
* APPENDIX TABLE - TOP SECTORS
*********************************************************
use "${file}sine_panel.dta", clear

* convert panel dataset to cross-sectional dataset
bys SIREN YEAR: keep if Y==YEAR
duplicates drop SIREN YEAR, force

* define levels
gen niv5=APE2_sine
merge m:1 niv5 using "${file}nafniveauxrev2.dta" 
drop if _merge==2
drop _merge
drop NAF5 NAF4 NAF3 NAF2 NAF1
rename (niv1 niv2 niv3 niv4 niv5) (NAF1 NAF2 NAF3 NAF4 NAF5)
drop if NAF4==""

* keep relevant variables
keep YEAR Y SIREN NAF* FEMALE FINV4* VC INC

* merge title industry (downloaded from INSEE website)
gen NAF=NAF5
merge m:1 NAF using "${file}Nomenclatures-NAF-rev-2_anglais.dta"
rename (intitulésdelaNAFrév2français intitulésdelaNAFrév2anglais) (NAF5_lablfr NAF5_lablen)
drop if _merge==2
drop _merge

replace NAF1="SECTION "+NAF1
replace NAF=NAF1
merge m:1 NAF using "${file}Nomenclatures-NAF-rev-2_anglais.dta"
rename (intitulésdelaNAFrév2français intitulésdelaNAFrév2anglais) (NAF1_lablfr NAF1_lablen)
drop if _merge==2
drop _merge

replace NAF=NAF4
merge m:1 NAF using "${file}Nomenclatures-NAF-rev-2_anglais.dta"
rename (intitulésdelaNAFrév2français intitulésdelaNAFrév2anglais) (NAF4_lablfr NAF4_lablen)
drop if _merge==2
drop _merge


* top sectors at the SIC4 level (naf4)
preserve
bys NAF4 : egen inc=sum(INC)
bys NAF4 : egen ee=sum(FINV4)
bys NAF4 : egen vc=sum(VC)
bys NAF4 : egen f1=sum(FEMALE)
bys NAF4 : replace f1=f1/_N
bys NAF4 : gen  N=_N
replace ee=ee/N
replace inc=inc/N

drop if N<50 | ee==0 // drop sectors with less than 50 firms and no equity deals
duplicates drop NAF4, force
gsort -f1
gen n=_n
gen t=_N
format %8.3f f1
format %9.0fc N
format inc %9.3fc
format ee %9.3fc
replace NAF4_lablen=substr(NAF4_lablen,1,100)
list n NAF4_lablen N f1 inc ee vc  if (n<=5|n>=t-4)
keep if (n<=10|n>=t-9)
order n NAF4_lablen N f1 inc ee
keep n NAF4_lablen N f1 inc ee
label var N "Observations"

outsheet using "${output}stat_female_naf4.csv", comma replace label
restore

* top sectors at the SIC1 level (naf1)
preserve
bys NAF1 : egen inc=sum(INC)
bys NAF1 : egen ee=sum(FINV4)
bys NAF1 : egen vc=sum(VC)
bys NAF1 : egen f1=sum(FEMALE)
bys NAF1 : replace f1=f1/_N
bys NAF1 : gen  N=_N
replace ee=ee/N
replace inc=inc/N
duplicates drop NAF1, force
gsort -f1
gen n=_n
gen t=_N
format %8.3f f1
format %9.0fc N
format inc %9.3fc
format ee %9.3fc
replace NAF1_lablen=substr(NAF1_lablen,1,100)
list n NAF1_lablen N f1 inc ee 
keep n NAF1_lablen N f1 inc ee
order n NAF1_lablen N f1 inc ee
label var N "Observations"

outsheet using "${output}stat_female_naf1.csv", comma replace label
restore
